Metris over replicates

Based on replicates of the mappings of RCTs across diseases, we will compute for each disease separately, and for each replicate:

  • Nb trials per region relevant to the disease
  • Nb trials per region relevant to the burden of diseases
  • Nb trials world wide and in non-high-income countries relevant to the disease and to the burden
  • Idem for number patients

In [1]:
library(data.table)
library(foreach)
library(doParallel)
options(warn = 2)

#Upload database
data <- read.table("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/database_RCTs_regions_27diseases.txt")

#Upload traduction names/label categories
Mgbd <- read.table("../Data/27_gbd_groups.txt")


Loading required package: iterators
Loading required package: parallel

In [2]:
#Regions per trial
regs <- sort(unique(unlist(strsplit(as.character(data$Regions),"&"))))
LR <- lapply(regs,function(x){1:nrow(data)%in%grep(x,data$Regions)})
LR <- do.call('cbind',LR)
LR <- data.table(LR)
LR$TrialID <- data$TrialID

#Nb of patients per region per trial
#Supressing sample size of trials with sample size below 10 and above 200k
data$Sample[data$Sample<10 | data$Sample>200000] <- NA
#Nb countries per region per trial to distribute sample size equally across countries
nb_ctrs <- lapply(strsplit(as.character(data$Nb_ctr_per_reg),'&'),as.numeric)
RGs <-strsplit(as.character(data$Regions),'&')
pats <- data.frame(TrialID = rep(data$TrialID,sapply(nb_ctrs,length)),
                   Nb_ctrs = unlist(nb_ctrs),
                   Region = unlist(RGs),
                   Tot_sample = rep(data$Sample,sapply(nb_ctrs,length)))

pats$tot_ctrs <- rep(sapply(nb_ctrs,sum),sapply(nb_ctrs,length))
pats$sample_per_reg <- pats$Tot_sample*pats$Nb_ctrs/pats$tot_ctrs
pats <- data.table(pats)
setkey(pats,TrialID)

Metrics for all diseases


In [3]:
d <- 0

tp0 <- proc.time()

SMs <- list.files(paste("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/",as.character(d),sep=""))
SMs <- SMs[grep("Reclassif",SMs)]

cl<-makeCluster(4)
registerDoParallel(cl)

A <- foreach(k = SMs, .packages="data.table") %dopar% {

	repl <- fread(paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/",as.character(d),"/",k),collapse=""))
	repl$TrialID <- LR$TrialID
	setkey(repl,TrialID)
	replpats <- merge(pats,repl)
	setkey(replpats,Region)
	
	#Output data
	df <- data.table(Region=c(sort(regs),"All","Non-HI"),Dis="all",RCTs=as.integer(0),Patients=as.numeric(0))

	#Par région
	#Nb trials par region relevant to GBD
	df[Dis=="all" & Region%in%regs,RCTs:=table(replpats[recl_dis==1,Region])]
	#Nb patients par région relevant to GBD
	df[Dis=="all" & Region%in%regs,Patients:=replpats[recl_dis==1,][regs,sum(sample_per_reg,na.rm=TRUE),by=.EACHI]$V1]

	#WorldWide
	#Nb trials worldwide concernant relevant to GBD
	df[Dis=="all" & Region=="All",RCTs:=sum(repl$recl_dis)]
	#Nb patients worldwide relevant to GBD
	df[Dis=="all" & Region=="All",Patients:=sum(replpats[recl_dis==1,sample_per_reg],na.rm=TRUE)]

	#Non-HI countries
	#Nb trials in non-HI countries relevant to GBD
	df[Dis=="all" & Region=="Non-HI",RCTs:=replpats[Region!="High-income",][recl_dis==1,][!duplicated(TrialID),.N]]
	#Nb patients worldwide concernant la maladie and relevant to GBD
	df[Dis=="all" & Region=="Non-HI",Patients:=sum(replpats[Region!="High-income",][recl_dis==1,sample_per_reg],na.rm=TRUE)]

}

stopCluster(cl)
write.table(rbindlist(A),paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/Metrics_over_repl/Metrics_over_replicates_",as.character(d),".txt"),collapse=""),row.names=FALSE)

rm(A)

tp1 <- proc.time()
print("for all diseases, finished after (min):")
print((tp1-tp0)/60)


[1] "for all diseases, finished after (min):"
        user       system      elapsed 
 0.164250000  0.009216667 16.766750000 

For each disease


In [4]:
dis <- 1:27

t0 <- proc.time()

for(d in dis){

tp0 <- proc.time()
print(paste("starting disease ",d,": ",as.character(Mgbd$x[d])),collapse="") 

SMs <- list.files(paste("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/",as.character(d),sep=""))
SMs <- SMs[grep("Reclassif",SMs)]
if(length(SMs)<9000) {
print(paste(c("disease ",d,": ",as.character(Mgbd$x[d])," has only ",length(SMs)," replicates: we pass to next one"),collapse=""))
next
}

cl<-makeCluster(4)
registerDoParallel(cl)

A <- foreach(k = SMs, .packages="data.table") %dopar% {

	repl <- fread(paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/",as.character(d),"/",k),collapse=""))
	repl$TrialID <- LR$TrialID
	setkey(repl,TrialID)
	replpats <- merge(pats,repl)
	setkey(replpats,Region)
	
	#Output data
	df <- data.table(Region=c(sort(regs),"All","Non-HI"),Dis=rep(c("dis","all"),each=9),RCTs=as.integer(0),Patients=as.numeric(0))

	#Par région
	#Nb trials par region concernant la maladie and relevant to GBD
	df[Dis=="dis" & Region%in%regs,RCTs:=table(replpats[recl_dis==1,Region])]
	df[Dis=="all" & Region%in%regs,RCTs:=table(replpats[recl_dis+recl_oth>=1,Region])]
	#Nb patients par région concernant la maladie and relevant to GBD
	df[Dis=="dis" & Region%in%regs,Patients:=replpats[recl_dis==1,][regs,sum(sample_per_reg,na.rm=TRUE),by=.EACHI]$V1]
	df[Dis=="all" & Region%in%regs,Patients:=replpats[recl_dis+recl_oth>=1,][regs,sum(sample_per_reg,na.rm=TRUE),by=.EACHI]$V1]

	#WorldWide
	#Nb trials worldwide concernant la maladie and relevant to GBD
	df[Dis=="dis" & Region=="All",RCTs:=sum(repl$recl_dis)]
	df[Dis=="all" & Region=="All",RCTs:=sum(repl$recl_dis+repl$recl_oth>=1)]
	#Nb patients worldwide concernant la maladie and relevant to GBD
	df[Dis=="dis" & Region=="All",Patients:=sum(replpats[recl_dis==1,sample_per_reg],na.rm=TRUE)]
	df[Dis=="all" & Region=="All",Patients:=sum(replpats[recl_dis+recl_oth>=1,sample_per_reg],na.rm=TRUE)]

	#Non-HI countries
	#Nb trials worldwide concernant la maladie and relevant to GBD
	df[Dis=="dis" & Region=="Non-HI",RCTs:=replpats[Region!="High-income",][recl_dis==1,][!duplicated(TrialID),.N]]
	df[Dis=="all" & Region=="Non-HI",RCTs:=replpats[Region!="High-income",][recl_dis+recl_oth>=1,][!duplicated(TrialID),.N]]
	#Nb patients worldwide concernant la maladie and relevant to GBD
	df[Dis=="dis" & Region=="Non-HI",Patients:=sum(replpats[Region!="High-income",][recl_dis==1,sample_per_reg],na.rm=TRUE)]
	df[Dis=="all" & Region=="Non-HI",Patients:=sum(replpats[Region!="High-income",][recl_dis+recl_oth>=1,sample_per_reg],na.rm=TRUE)]

}

stopCluster(cl)

fwrite(rbindlist(A),paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/Metrics_over_repl/Metrics_over_replicates_",as.character(d),".txt"),collapse=""))
rm(A)

tp1 <- proc.time()
print(paste(c("disease ",d,": ",as.character(Mgbd$x[d])," finished after (min):"),collapse=""))
print((tp1-tp0)/60)
}

t1 <- proc.time()

print("total time (hrs):")
print((t1-t0)/3600)


[1] "starting disease  1 :  Tuberculosis"
[1] "disease 1: Tuberculosis finished after (min):"
      user     system    elapsed 
 0.1607667  0.0091500 22.0891333 
[1] "starting disease  2 :  HIV/AIDS"
[1] "disease 2: HIV/AIDS finished after (min):"
        user       system      elapsed 
 0.165116667  0.009366667 22.222183333 
[1] "starting disease  3 :  Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases"
[1] "disease 3: Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases finished after (min):"
      user     system    elapsed 
 0.1810667  0.0103000 22.8038000 
[1] "starting disease  4 :  Malaria"
[1] "disease 4: Malaria finished after (min):"
       user      system     elapsed 
 0.16256667  0.01113333 23.68710000 
[1] "starting disease  5 :  Neglected tropical diseases excluding malaria"
[1] "disease 5: Neglected tropical diseases excluding malaria finished after (min):"
        user       system      elapsed 
 0.173850000  0.009383333 23.937400000 
[1] "starting disease  6 :  Maternal disorders"
[1] "disease 6: Maternal disorders finished after (min):"
    user   system  elapsed 
 0.18010  0.00980 23.71605 
[1] "starting disease  7 :  Neonatal disorders"
[1] "disease 7: Neonatal disorders finished after (min):"
       user      system     elapsed 
 0.16901667  0.01043333 23.98530000 
[1] "starting disease  8 :  Nutritional deficiencies"
[1] "disease 8: Nutritional deficiencies finished after (min):"
       user      system     elapsed 
 0.16986667  0.01101667 24.30871667 
[1] "starting disease  9 :  Sexually transmitted diseases excluding HIV"
[1] "disease 9: Sexually transmitted diseases excluding HIV has only 8943 replicates: we pass to next one"
[1] "starting disease  10 :  Hepatitis"
[1] "disease 10: Hepatitis finished after (min):"
        user       system      elapsed 
 0.170100000  0.009433333 24.022366667 
[1] "starting disease  11 :  Leprosy"
[1] "disease 11: Leprosy has only 6834 replicates: we pass to next one"
[1] "starting disease  12 :  Neoplasms"
[1] "disease 12: Neoplasms finished after (min):"
        user       system      elapsed 
 0.173466667  0.009533333 24.732016667 
[1] "starting disease  13 :  Cardiovascular and circulatory diseases"
[1] "disease 13: Cardiovascular and circulatory diseases finished after (min):"
      user     system    elapsed 
 0.1801833  0.0102500 23.8997833 
[1] "starting disease  14 :  Chronic respiratory diseases"
[1] "disease 14: Chronic respiratory diseases finished after (min):"
       user      system     elapsed 
 0.17268333  0.01016667 24.03078333 
[1] "starting disease  15 :  Cirrhosis of the liver"
[1] "disease 15: Cirrhosis of the liver finished after (min):"
       user      system     elapsed 
 0.16278333  0.01041667 23.83195000 
[1] "starting disease  16 :  Digestive diseases (except cirrhosis)"
[1] "disease 16: Digestive diseases (except cirrhosis) finished after (min):"
        user       system      elapsed 
 0.172150000  0.007933333 24.103100000 
[1] "starting disease  17 :  Neurological disorders"
[1] "disease 17: Neurological disorders finished after (min):"
       user      system     elapsed 
 0.17835000  0.01056667 24.13151667 
[1] "starting disease  18 :  Mental and behavioral disorders"
[1] "disease 18: Mental and behavioral disorders finished after (min):"
       user      system     elapsed 
 0.16003333  0.01068333 24.13103333 
[1] "starting disease  19 :  Diabetes, urinary diseases and male infertility"
[1] "disease 19: Diabetes, urinary diseases and male infertility finished after (min):"
      user     system    elapsed 
 0.1655167  0.0090500 24.2729000 
[1] "starting disease  20 :  Gynecological diseases"
[1] "disease 20: Gynecological diseases finished after (min):"
    user   system  elapsed 
 0.16255  0.00965 23.91038 
[1] "starting disease  21 :  Hemoglobinopathies and hemolytic anemias"
[1] "disease 21: Hemoglobinopathies and hemolytic anemias has only 6680 replicates: we pass to next one"
[1] "starting disease  22 :  Musculoskeletal disorders"
[1] "disease 22: Musculoskeletal disorders finished after (min):"
      user     system    elapsed 
 0.1727167  0.0108000 24.2202667 
[1] "starting disease  23 :  Congenital anomalies"
[1] "disease 23: Congenital anomalies has only 8650 replicates: we pass to next one"
[1] "starting disease  24 :  Skin and subcutaneous diseases"
[1] "disease 24: Skin and subcutaneous diseases finished after (min):"
        user       system      elapsed 
 0.180083333  0.009416667 24.034700000 
[1] "starting disease  25 :  Sense organ diseases"
[1] "disease 25: Sense organ diseases finished after (min):"
      user     system    elapsed 
 0.1627333  0.0090000 23.6132167 
[1] "starting disease  26 :  Oral disorders"
[1] "disease 26: Oral disorders finished after (min):"
    user   system  elapsed 
 0.16495  0.00870 23.41402 
[1] "starting disease  27 :  Sudden infant death syndrome"
[1] "disease 27: Sudden infant death syndrome has only 755 replicates: we pass to next one"
[1] "total time (hrs):"
       user      system     elapsed 
0.062416944 0.003606944 8.718740000